#Please use the below command to install plotly. It has been used in this project and will be required for the plots to be generated.
#pip install plotly==4.9.0
#Data files for this project was extracted from the government of India website for statistical information.
#Here we will be exploring the accidents.
import xlrd
import json
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import plotly.offline as po
import plotly.graph_objs as go
import plotly.express as px
def add_year_column(year,nofrows):
year_col = []
for a in nofrows:
year_col.append(year)
year_df = pd.Series(year_col)
return year_df
# Read Excel file for Natural and Unatural Causes with multiple sheets with Statewise statistics, Citywise statistics and Overall
# statistics for the years
loc = ('Natural and Unnatural.xlsx')
# To open Workbook
wb = xlrd.open_workbook(loc)
sheet = wb.sheet_by_index(0)
sheet_list1 = wb.sheet_names()
a = 0
for i in range(20):
if(sheet.cell_value(i,0)) == 1.0:
a = i
break
df_input = pd.read_excel('Natural and Unnatural.xlsx', sheet_by_index=0, skiprows=a, nrows=37)
b=0
sheet_temp = wb.sheet_by_index(1)
for i in range(20):
chk = sheet_temp.cell_value(i,0).replace(" ","")
if chk == 'Year':
b = i
break
df_accident_naun = pd.read_excel('Natural and Unnatural.xlsx',sheet_name=sheet_list1[1], skiprows=b, nrows=16)
df_accident_naun=df_accident_naun.set_index('Year')
C=0
sheet_temp = wb.sheet_by_index(2)
for i in range(20):
chk = sheet_temp.cell_value(i,0).replace(" ","")
if chk == 'City':
c = i
break
df_accident_naun_city = pd.read_excel('Natural and Unnatural.xlsx',sheet_name=sheet_list1[2], skiprows=c, nrows=55)
# Dataframe to be generated for the statewise statistics for natural and unnatural causes
# The excel columns are parsed and renamed with identical coulumns to apporach the dataframe in an organized fashion
nat_column_lbl = ['Avalanche', 'Lightning','HeatStroke', 'Flood', 'ColdWeather', 'Cyclone',
'Starvation', 'Earthquake', 'Epidemic','TorrentialRains', 'OtherNatural']
unnat_column_lbl = ['AirCrash','StructureCollapse','Drowning','Electrocution',
'Explosion','Falls','FMAccidents','Fire','Firearms','SuddenDeath',
'KilledbyAnimal','MQDisaster','Poisioning','Stampede','Suffocation',
'Trafficaccident','OtherCauses','Unknowncauses']
states_list = ['AP','AR','AS','BH','CG','GA','GJ','HR','HP','JK','JH','KA','KL','MP','MH','MN','ML','MZ',
'NL','OR','PB','RJ','SK','TN','TS','TR','UP','UK','WB','AN','CH','DH','DD','DL','LD','PY']
df_complete = pd.DataFrame()
df_temp = pd.DataFrame()
years = [2008,2009,2010,2011,2012,2013,2014,2015]
cols0 = [2,3,11,19,27,35,43,51,59,67,75,83,91,99,107,115,123,131,139,147,155,163,171,179,
187,195,203,211,219,227,235,243]
cols1 = [2,4,12,20,28,36,44,52,60,68,76,84,92,100,108,116,124,132,140,148,156,164,172,180,
188,196,204,212,220,228,236,244]
cols2 = [2,5,13,21,29,37,45,53,61,69,77,85,93,101,109,117,125,133,141,149,157,165,173,181,
189,197,205,213,221,229,237,245]
cols3 = [2,6,14,22,30,38,46,54,62,70,78,86,94,102,110,118,126,134,142,150,158,166,174,182,
190,198,206,214,222,230,238,246]
cols4 = [2,7,15,23,31,39,47,55,63,71,79,87,95,103,111,119,127,135,143,151,159,167,175,183,
191,199,207,215,223,231,239,247]
cols5 = [2,8,16,24,32,40,48,56,64,72,80,88,96,104,112,120,128,136,144,152,160,168,176,184,
192,200,208,216,224,232,240,248]
cols6 = [2,9,17,25,33,41,49,57,65,73,81,89,97,105,113,121,129,137,145,153,161,169,177,185,
193,201,209,217,225,233,241,249]
cols7 = [2,10,18,26,34,42,50,58,66,74,82,90,98,106,114,122,130,138,146,154,162,170,178,186,
194,202,210,218,226,234,242,250]
datacols = [cols0, cols1,cols2,cols3, cols4, cols5, cols6, cols7]
for i, j in zip(years,datacols):
df_temp = df_input[j]
year_df = add_year_column(i,np.arange(len(df_temp)))
df_temp.insert(0,'Year',year_df)
if i == 2008:
df_temp = df_temp.rename(columns={2:'State', 3:'Avalanche', 11:'Lightning',
19:'HeatStroke', 27:'Flood', 35:'ColdWeather', 43:'Cyclone',
51:'Starvation', 59:'Earthquake', 67:'Epidemic', 75:'TorrentialRains',
83:'Other',91:'NTotal',99:'AirCrash',107:'StructureCollapse',115:'Drowning',123:'Electrocution',
131:'Explosion',139:'Falls',147:'FMAccidents',155:'Fire',163:'Firearms',171:'SuddenDeath',
179:'KilledbyAnimal',187:'MQDisaster',195:'Poisioning',203:'Stampede',211:'Suffocation',
219:'Trafficaccident',227:'OtherCauses',235:'Unknowncauses',243:'UnTotal'})
elif i == 2009:
df_temp = df_temp.rename(columns={2:'State', 4:'Avalanche', 12:'Lightning',
20:'HeatStroke', 28:'Flood', 36:'ColdWeather', 44:'Cyclone',
52:'Starvation', 60:'Earthquake', 68:'Epidemic', 76:'TorrentialRains',
84:'Other',92:'NTotal',100:'AirCrash',108:'StructureCollapse',116:'Drowning',124:'Electrocution',
132:'Explosion',140:'Falls',148:'FMAccidents',156:'Fire',164:'Firearms',172:'SuddenDeath',
180:'KilledbyAnimal',188:'MQDisaster',196:'Poisioning',204:'Stampede',212:'Suffocation',
220:'Trafficaccident',228:'OtherCauses',236:'Unknowncauses',244:'UnTotal'})
elif i == 2010:
df_temp = df_temp.rename(columns={2:'State', 5:'Avalanche', 13:'Lightning',
21:'HeatStroke', 29:'Flood', 37:'ColdWeather', 45:'Cyclone',
53:'Starvation', 61:'Earthquake', 69:'Epidemic', 77:'TorrentialRains',
85:'Other',93:'NTotal',101:'AirCrash',109:'StructureCollapse',117:'Drowning',125:'Electrocution',
133:'Explosion',141:'Falls',149:'FMAccidents',157:'Fire',165:'Firearms',173:'SuddenDeath',
181:'KilledbyAnimal',189:'MQDisaster',197:'Poisioning',205:'Stampede',213:'Suffocation',
221:'Trafficaccident',229:'OtherCauses',237:'Unknowncauses',245:'UnTotal'})
elif i == 2011:
df_temp = df_temp.rename(columns={2:'State', 6:'Avalanche', 14:'Lightning',
22:'HeatStroke', 30:'Flood', 38:'ColdWeather', 46:'Cyclone',
54:'Starvation', 62:'Earthquake', 70:'Epidemic', 78:'TorrentialRains',
86:'Other',94:'NTotal',102:'AirCrash',110:'StructureCollapse',118:'Drowning',126:'Electrocution',
134:'Explosion',142:'Falls',150:'FMAccidents',158:'Fire',166:'Firearms',174:'SuddenDeath',
182:'KilledbyAnimal',190:'MQDisaster',198:'Poisioning',206:'Stampede',214:'Suffocation',
222:'Trafficaccident',230:'OtherCauses',238:'Unknowncauses',246:'UnTotal'})
elif i == 2012:
df_temp = df_temp.rename(columns={2:'State', 7:'Avalanche', 15:'Lightning',
23:'HeatStroke', 31:'Flood', 39:'ColdWeather', 47:'Cyclone',
55:'Starvation', 63:'Earthquake', 71:'Epidemic', 79:'TorrentialRains',
87:'Other',95:'NTotal',103:'AirCrash',111:'StructureCollapse',119:'Drowning',127:'Electrocution',
135:'Explosion',143:'Falls',151:'FMAccidents',159:'Fire',167:'Firearms',175:'SuddenDeath',
183:'KilledbyAnimal',191:'MQDisaster',199:'Poisioning',207:'Stampede',215:'Suffocation',
223:'Trafficaccident',231:'OtherCauses',239:'Unknowncauses',247:'UnTotal'})
elif i == 2013:
df_temp = df_temp.rename(columns={2:'State', 8:'Avalanche', 16:'Lightning',
24:'HeatStroke', 32:'Flood', 40:'ColdWeather', 48:'Cyclone',
56:'Starvation', 64:'Earthquake', 72:'Epidemic', 80:'TorrentialRains',
88:'Other',96:'NTotal',104:'AirCrash',112:'StructureCollapse',120:'Drowning',128:'Electrocution',
136:'Explosion',144:'Falls',152:'FMAccidents',160:'Fire',168:'Firearms',176:'SuddenDeath',
184:'KilledbyAnimal',192:'MQDisaster',200:'Poisioning',208:'Stampede',216:'Suffocation',
224:'Trafficaccident',232:'OtherCauses',240:'Unknowncauses',248:'UnTotal'})
elif i == 2014:
df_temp = df_temp.rename(columns={2:'State', 9:'Avalanche', 17:'Lightning',
25:'HeatStroke', 33:'Flood', 41:'ColdWeather', 49:'Cyclone',
57:'Starvation', 65:'Earthquake', 73:'Epidemic', 81:'TorrentialRains',
89:'Other',97:'NTotal',105:'AirCrash',113:'StructureCollapse',121:'Drowning',129:'Electrocution',
137:'Explosion',145:'Falls',153:'FMAccidents',161:'Fire',169:'Firearms',177:'SuddenDeath',
185:'KilledbyAnimal',193:'MQDisaster',201:'Poisioning',209:'Stampede',217:'Suffocation',
225:'Trafficaccident',233:'OtherCauses',241:'Unknowncauses',249:'UnTotal'})
elif i == 2015:
df_temp = df_temp.rename(columns={2:'State', 10:'Avalanche', 18:'Lightning',
26:'HeatStroke', 34:'Flood', 42:'ColdWeather', 50:'Cyclone',
58:'Starvation', 66:'Earthquake', 74:'Epidemic', 82:'TorrentialRains',
90:'Other',98:'NTotal',106:'AirCrash',114:'StructureCollapse',122:'Drowning',130:'Electrocution',
138:'Explosion',146:'Falls',154:'FMAccidents',162:'Fire',170:'Firearms',178:'SuddenDeath',
186:'KilledbyAnimal',194:'MQDisaster',202:'Poisioning',210:'Stampede',218:'Suffocation',
226:'Trafficaccident',234:'OtherCauses',242:'Unknowncauses',250:'UnTotal'})
df_complete = df_complete.append(df_temp)
df_complete.sort_values(by=['Year','State'], inplace=True)
df_state_naun = df_complete[df_complete['Year'] > 2008]
# Dataframe to be generated for the citywise statistics for natural and unnatural causes
# The excel columns are parsed and renamed with identical coulumns to apporach the dataframe in an organized fashion
df_city_acci_naun = pd.DataFrame()
df_temp2 = pd.DataFrame()
arr2 = ['Year','City','State','Avalanche', 'Lightning','HeatStroke', 'Flood', 'ColdWeather', 'Cyclone',
'Starvation', 'Earthquake', 'Epidemic','TorrentialRains', 'Other', 'NTotal',
'AirCrash','StructureCollapse','Drowning','Electrocution',
'Explosion','Falls','FMAccidents','Fire','Firearms','SuddenDeath',
'KilledbyAnimal','MQDisaster','Poisioning','Stampede','Suffocation',
'Trafficaccident','OtherCauses','Unknowncauses','UnTotal']
years = [2009,2010,2011,2012,2013,2014,2015]
cols0 = ['City','State',2009,'2009.1', '2009.2', '2009.3', '2009.4', '2009.5', '2009.6', '2009.7','2009.8',
'2009.9', '2009.10', '2009.11', '2009.12', '2009.13', '2009.14', '2009.15', '2009.16', '2009.17',
'2009.18', '2009.19', '2009.20', '2009.21', '2009.22', '2009.23','2009.24', '2009.25', '2009.26',
'2009.27', '2009.28', '2009.29', '2009.30']
cols1 = ['City','State',2010,'2010.1', '2010.2', '2010.3', '2010.4', '2010.5', '2010.6', '2010.7','2010.8',
'2010.9', '2010.10', '2010.11', '2010.12', '2010.13', '2010.14', '2010.15', '2010.16', '2010.17',
'2010.18', '2010.19', '2010.20', '2010.21', '2010.22', '2010.23','2010.24', '2010.25', '2010.26',
'2010.27', '2010.28', '2010.29', '2010.30']
cols2 = ['City','State',2011,'2011.1', '2011.2', '2011.3', '2011.4', '2011.5', '2011.6', '2011.7','2011.8',
'2011.9', '2011.10', '2011.11', '2011.12', '2011.13', '2011.14', '2011.15', '2011.16', '2011.17',
'2011.18', '2011.19', '2011.20', '2011.21', '2011.22', '2011.23','2011.24', '2011.25', '2011.26',
'2011.27', '2011.28', '2011.29', '2011.30']
cols3 = ['City','State',2012,'2012.1', '2012.2', '2012.3', '2012.4', '2012.5', '2012.6', '2012.7','2012.8',
'2012.9', '2012.10', '2012.11', '2012.12', '2012.13', '2012.14', '2012.15', '2012.16', '2012.17',
'2012.18', '2012.19', '2012.20', '2012.21', '2012.22', '2012.23','2012.24', '2012.25', '2012.26',
'2012.27', '2012.28', '2012.29', '2012.30']
cols4 = ['City','State',2013,'2013.1', '2013.2', '2013.3', '2013.4', '2013.5', '2013.6', '2013.7','2013.8',
'2013.9', '2013.10', '2013.11', '2013.12', '2013.13', '2013.14', '2013.15', '2013.16', '2013.17',
'2013.18', '2013.19', '2013.20', '2013.21', '2013.22', '2013.23','2013.24', '2013.25', '2013.26',
'2013.27', '2013.28', '2013.29', '2013.30']
cols5 = ['City','State',2014,'2014.1', '2014.2', '2014.3', '2014.4', '2014.5', '2014.6', '2014.7','2014.8',
'2014.9', '2014.10', '2014.11', '2014.12', '2014.13', '2014.14', '2014.15', '2014.16', '2014.17',
'2014.18', '2014.19', '2014.20', '2014.21', '2014.22', '2014.23','2014.24', '2014.25', '2014.26',
'2014.27', '2014.28', '2014.29', '2014.30']
cols6 = ['City','State',2015,'2015.1', '2015.2', '2015.3', '2015.4', '2015.5', '2015.6', '2015.7','2015.8',
'2015.9', '2015.10', '2015.11', '2015.12', '2015.13', '2015.14', '2015.15', '2015.16', '2015.17',
'2015.18', '2015.19', '2015.20', '2015.21', '2015.22', '2015.23','2015.24', '2015.25', '2015.26',
'2015.27', '2015.28', '2015.29', '2015.30']
datacols = [cols0, cols1,cols2,cols3, cols4, cols5, cols6]
for i, j in zip(years,datacols):
df_temp2 = df_accident_naun_city[j]
year_df = add_year_column(i,np.arange(len(df_temp2)))
df_temp2.insert(0,'Year',year_df)
if i == 2009:
arr = df_temp2.columns
df_temp2 = df_temp2.rename(columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
arr[11]: arr2[11],arr[12]: arr2[12],arr[13]: arr2[13],arr[14]: arr2[14],arr[15]: arr2[15],
arr[16]: arr2[16],arr[17]: arr2[17],arr[18]: arr2[18],arr[19]: arr2[19],arr[20]: arr2[20],
arr[21]: arr2[21],arr[22]: arr2[22],arr[23]: arr2[23],arr[24]: arr2[24],arr[25]: arr2[25],
arr[26]: arr2[26],arr[27]: arr2[27],arr[28]: arr2[28],arr[29]: arr2[29],arr[30]: arr2[30],
arr[31]: arr2[31],arr[32]: arr2[32],arr[33]: arr2[33]})
elif i == 2010:
arr = df_temp2.columns
df_temp2 = df_temp2.rename(columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
arr[11]: arr2[11],arr[12]: arr2[12],arr[13]: arr2[13],arr[14]: arr2[14],arr[15]: arr2[15],
arr[16]: arr2[16],arr[17]: arr2[17],arr[18]: arr2[18],arr[19]: arr2[19],arr[20]: arr2[20],
arr[21]: arr2[21],arr[22]: arr2[22],arr[23]: arr2[23],arr[24]: arr2[24],arr[25]: arr2[25],
arr[26]: arr2[26],arr[27]: arr2[27],arr[28]: arr2[28],arr[29]: arr2[29],arr[30]: arr2[30],
arr[31]: arr2[31],arr[32]: arr2[32],arr[33]: arr2[33]})
elif i == 2011:
arr = df_temp2.columns
df_temp2 = df_temp2.rename(columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
arr[11]: arr2[11],arr[12]: arr2[12],arr[13]: arr2[13],arr[14]: arr2[14],arr[15]: arr2[15],
arr[16]: arr2[16],arr[17]: arr2[17],arr[18]: arr2[18],arr[19]: arr2[19],arr[20]: arr2[20],
arr[21]: arr2[21],arr[22]: arr2[22],arr[23]: arr2[23],arr[24]: arr2[24],arr[25]: arr2[25],
arr[26]: arr2[26],arr[27]: arr2[27],arr[28]: arr2[28],arr[29]: arr2[29],arr[30]: arr2[30],
arr[31]: arr2[31],arr[32]: arr2[32],arr[33]: arr2[33]})
elif i == 2012:
arr = df_temp2.columns
df_temp2 = df_temp2.rename(columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
arr[11]: arr2[11],arr[12]: arr2[12],arr[13]: arr2[13],arr[14]: arr2[14],arr[15]: arr2[15],
arr[16]: arr2[16],arr[17]: arr2[17],arr[18]: arr2[18],arr[19]: arr2[19],arr[20]: arr2[20],
arr[21]: arr2[21],arr[22]: arr2[22],arr[23]: arr2[23],arr[24]: arr2[24],arr[25]: arr2[25],
arr[26]: arr2[26],arr[27]: arr2[27],arr[28]: arr2[28],arr[29]: arr2[29],arr[30]: arr2[30],
arr[31]: arr2[31],arr[32]: arr2[32],arr[33]: arr2[33]})
elif i == 2013:
arr = df_temp2.columns
df_temp2 = df_temp2.rename(columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
arr[11]: arr2[11],arr[12]: arr2[12],arr[13]: arr2[13],arr[14]: arr2[14],arr[15]: arr2[15],
arr[16]: arr2[16],arr[17]: arr2[17],arr[18]: arr2[18],arr[19]: arr2[19],arr[20]: arr2[20],
arr[21]: arr2[21],arr[22]: arr2[22],arr[23]: arr2[23],arr[24]: arr2[24],arr[25]: arr2[25],
arr[26]: arr2[26],arr[27]: arr2[27],arr[28]: arr2[28],arr[29]: arr2[29],arr[30]: arr2[30],
arr[31]: arr2[31],arr[32]: arr2[32],arr[33]: arr2[33]})
elif i == 2014:
arr = df_temp2.columns
df_temp2 = df_temp2.rename(columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
arr[11]: arr2[11],arr[12]: arr2[12],arr[13]: arr2[13],arr[14]: arr2[14],arr[15]: arr2[15],
arr[16]: arr2[16],arr[17]: arr2[17],arr[18]: arr2[18],arr[19]: arr2[19],arr[20]: arr2[20],
arr[21]: arr2[21],arr[22]: arr2[22],arr[23]: arr2[23],arr[24]: arr2[24],arr[25]: arr2[25],
arr[26]: arr2[26],arr[27]: arr2[27],arr[28]: arr2[28],arr[29]: arr2[29],arr[30]: arr2[30],
arr[31]: arr2[31],arr[32]: arr2[32],arr[33]: arr2[33]})
elif i == 2015:
arr = df_temp2.columns
df_temp2 = df_temp2.rename(columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
arr[11]: arr2[11],arr[12]: arr2[12],arr[13]: arr2[13],arr[14]: arr2[14],arr[15]: arr2[15],
arr[16]: arr2[16],arr[17]: arr2[17],arr[18]: arr2[18],arr[19]: arr2[19],arr[20]: arr2[20],
arr[21]: arr2[21],arr[22]: arr2[22],arr[23]: arr2[23],arr[24]: arr2[24],arr[25]: arr2[25],
arr[26]: arr2[26],arr[27]: arr2[27],arr[28]: arr2[28],arr[29]: arr2[29],arr[30]: arr2[30],
arr[31]: arr2[31],arr[32]: arr2[32],arr[33]: arr2[33]})
df_city_acci_naun = df_city_acci_naun.append(df_temp2)
df_city_acci_naun.sort_values(by=['Year','State','City'], inplace=True)
df_city_naun = df_city_acci_naun
df_city_acci_naun = df_city_acci_naun.set_index(['Year','State','City'])
#The accidental death statistics have a 3rd category - suicide. This is captured in a separate document and
#contains 4 sheets in one document.
suicide = ('Statewise-Suicide.xlsx')
# To open Workbook
wb2 = xlrd.open_workbook(suicide)
sheet_list = wb2.sheet_names()
a=0
sheet_temp = wb2.sheet_by_index(0)
for i in range(20):
chk = sheet_temp.cell_value(i,0).replace(" ","")
if chk == 'Year':
a = i
b=0
sheet_temp = wb2.sheet_by_index(1)
for i in range(20):
chk = sheet_temp.cell_value(i,0).replace(" ","")
if chk == 'Name':
b = i
c=0
sheet_temp = wb2.sheet_by_index(2)
for i in range(20):
chk = sheet_temp.cell_value(i,0).replace(" ","")
if chk == 'City':
c = i
d=0
sheet_temp = wb2.sheet_by_index(3)
for i in range(20):
chk = sheet_temp.cell_value(i,0).replace(" ","")
if chk == 'City':
d = i
# Suicide statistics by year
df_suicide_year = pd.read_excel('Statewise-Suicide.xlsx', sheet_by_index=0, skiprows=a)
arr = df_suicide_year.columns
arr2= ['Year', 'Upto14years', 'B1529years', 'B3044years', 'B4559years',
'Above60years', 'Male', 'Female', 'Transgender', 'Total',
'BankruptcyorIndebtness', 'Illness', 'DeathofDearPerson',
'Dowrydispute', 'Drugabuse/Addiction', 'Failureinexamination',
'FallinSocialreputation', 'Familyproblems', 'Loveaffairs', 'Poverty',
'Propertydispute', 'Unemployment', 'Causesnotknown', 'OtherSuicide',
'TotalC']
df_suicide_year = df_suicide_year.rename(
columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
arr[11]: arr2[11],arr[12]: arr2[12],arr[13]: arr2[13],arr[14]: arr2[14],arr[15]: arr2[15],
arr[16]: arr2[16],arr[17]: arr2[17],arr[18]: arr2[18],arr[19]: arr2[19],arr[20]: arr2[20],
arr[21]: arr2[21],arr[22]: arr2[22],arr[23]: arr2[23],arr[24]: arr2[24]})
# Suicide statistics by state and year with all causes with gender and age statistics included
df_suicide_state = pd.read_excel('Statewise-Suicide.xlsx', sheet_name=sheet_list[1], skiprows=b)
states_list = ['AP','AR','AS','BH','CG','GA','GJ','HR','HP','JK','JH','KA','KL','MP','MH','MN','ML','MZ',
'NL','OR','PB','RJ','SK','TN','TS','TR','UP','UK','WB','AN','CH','DH','DD','DL','LD','PY']
df_comp_suicide = pd.DataFrame()
df_temp2 = pd.DataFrame()
years = [2009,2010,2011,2012,2013,2014,2015]
cols0 = ['State','2009.9', '2009.10', '2009.11', '2009.12', '2009.13', '2009.14', '2009.15', '2009.16', '2009.17',
'2009.18', '2009.19', '2009.20', '2009.21', '2009.22', '2009.23']
cols1 = ['State','2010.9', '2010.10', '2010.11', '2010.12', '2010.13', '2010.14', '2010.15', '2010.16', '2010.17',
'2010.18', '2010.19', '2010.20', '2010.21', '2010.22', '2010.23']
cols2 = ['State','2011.9', '2011.10', '2011.11', '2011.12', '2011.13', '2011.14', '2011.15', '2011.16', '2011.17',
'2011.18', '2011.19', '2011.20', '2011.21', '2011.22', '2011.23']
cols3 = ['State','2012.9', '2012.10', '2012.11', '2012.12', '2012.13', '2012.14', '2012.15', '2012.16', '2012.17',
'2012.18', '2012.19', '2012.20', '2012.21', '2012.22', '2012.23']
cols4 = ['State','2013.9', '2013.10', '2013.11', '2013.12', '2013.13', '2013.14', '2013.15', '2013.16', '2013.17',
'2013.18', '2013.19', '2013.20', '2013.21', '2013.22', '2013.23']
cols5 = ['State','2014.9', '2014.10', '2014.11', '2014.12', '2014.13', '2014.14', '2014.15', '2014.16', '2014.17',
'2014.18', '2014.19', '2014.20', '2014.21', '2014.22', '2014.23']
cols6 = ['State','2015.9', '2015.10', '2015.11', '2015.12', '2015.13', '2015.14', '2015.15', '2015.16', '2015.17',
'2015.18', '2015.19', '2015.20', '2015.21', '2015.22', '2015.23']
datacols = [cols0, cols1,cols2,cols3, cols4, cols5, cols6]
for i, j in zip(years,datacols):
df_temp2 = df_suicide_state[j]
year_df = add_year_column(i,np.arange(len(df_temp2)))
df_temp2.insert(0,'Year',year_df)
if i == 2009:
df_temp2 = df_temp2.rename(columns={'State':'State','2009.9':'BankruptcyorIndebtness', '2009.10':'Illness',
'2009.11':'DeathofDearPerson', '2009.12':'Dowrydispute', '2009.13':'Drugabuse/Addiction',
'2009.14':'Failureinexamination','2009.15':'FallinSocialreputation', '2009.16':'Familyproblems',
'2009.17':'Loveaffairs','2009.18':'Poverty', '2009.19':'Propertydispute', '2009.20':'Unemployment',
'2009.21':'Causesnotknown','2009.22':'OtherSuicide', '2009.23':'TotalC'})
elif i == 2010:
df_temp2 = df_temp2.rename(columns={'State':'State','2010.9':'BankruptcyorIndebtness', '2010.10':'Illness',
'2010.11':'DeathofDearPerson', '2010.12':'Dowrydispute', '2010.13':'Drugabuse/Addiction',
'2010.14':'Failureinexamination','2010.15':'FallinSocialreputation', '2010.16':'Familyproblems',
'2010.17':'Loveaffairs','2010.18':'Poverty', '2010.19':'Propertydispute', '2010.20':'Unemployment',
'2010.21':'Causesnotknown','2010.22':'OtherSuicide', '2010.23':'TotalC'})
elif i == 2011:
df_temp2 = df_temp2.rename(columns={'State':'State','2011.9':'BankruptcyorIndebtness', '2011.10':'Illness',
'2011.11':'DeathofDearPerson', '2011.12':'Dowrydispute', '2011.13':'Drugabuse/Addiction',
'2011.14':'Failureinexamination','2011.15':'FallinSocialreputation', '2011.16':'Familyproblems',
'2011.17':'Loveaffairs','2011.18':'Poverty', '2011.19':'Propertydispute', '2011.20':'Unemployment',
'2011.21':'Causesnotknown','2011.22':'OtherSuicide', '2011.23':'TotalC'})
elif i == 2012:
df_temp2 = df_temp2.rename(columns={'State':'State','2012.9':'BankruptcyorIndebtness', '2012.10':'Illness',
'2012.11':'DeathofDearPerson', '2012.12':'Dowrydispute', '2012.13':'Drugabuse/Addiction',
'2012.14':'Failureinexamination','2012.15':'FallinSocialreputation', '2012.16':'Familyproblems',
'2012.17':'Loveaffairs','2012.18':'Poverty', '2012.19':'Propertydispute', '2012.20':'Unemployment',
'2012.21':'Causesnotknown','2012.22':'OtherSuicide', '2012.23':'TotalC'})
elif i == 2013:
df_temp2 = df_temp2.rename(columns={'State':'State','2013.9':'BankruptcyorIndebtness', '2013.10':'Illness',
'2013.11':'DeathofDearPerson', '2013.12':'Dowrydispute', '2013.13':'Drugabuse/Addiction',
'2013.14':'Failureinexamination','2013.15':'FallinSocialreputation', '2013.16':'Familyproblems',
'2013.17':'Loveaffairs','2013.18':'Poverty', '2013.19':'Propertydispute', '2013.20':'Unemployment',
'2013.21':'Causesnotknown','2013.22':'OtherSuicide', '2013.23':'TotalC'})
elif i == 2014:
df_temp2 = df_temp2.rename(columns={'State':'State','2014.9':'BankruptcyorIndebtness', '2014.10':'Illness',
'2014.11':'DeathofDearPerson', '2014.12':'Dowrydispute', '2014.13':'Drugabuse/Addiction',
'2014.14':'Failureinexamination','2014.15':'FallinSocialreputation', '2014.16':'Familyproblems',
'2014.17':'Loveaffairs','2014.18':'Poverty', '2014.19':'Propertydispute', '2014.20':'Unemployment',
'2014.21':'Causesnotknown','2014.22':'OtherSuicide', '2014.23':'TotalC'})
elif i == 2015:
df_temp2 = df_temp2.rename(columns={'State':'State','2015.9':'BankruptcyorIndebtness', '2015.10':'Illness',
'2015.11':'DeathofDearPerson', '2015.12':'Dowrydispute', '2015.13':'Drugabuse/Addiction',
'2015.14':'Failureinexamination','2015.15':'FallinSocialreputation', '2015.16':'Familyproblems',
'2015.17':'Loveaffairs','2015.18':'Poverty', '2015.19':'Propertydispute', '2015.20':'Unemployment',
'2015.21':'Causesnotknown','2015.22':'OtherSuicide', '2015.23':'TotalC'})
df_comp_suicide = df_comp_suicide.append(df_temp2)
df_comp_suicide.sort_values(by=['Year','State'], inplace=True)
df_state_sui = df_comp_suicide
df_comp_suicide = df_comp_suicide.set_index(['Year','State'])
# Suicide statistics by age and gender by State and year is captured in this dataframe
df_state_suicide_age_gender = pd.DataFrame()
df_temp3 = pd.DataFrame()
arr2= ['Year', 'State','Upto14years', 'B1529years', 'B3044years', 'B4559years',
'Above60years', 'Male', 'Female', 'Transgender', 'Total']
years = [2009,2010,2011,2012,2013,2014,2015]
cols0 = ['State',2009,'2009.1', '2009.2', '2009.3', '2009.4', '2009.5', '2009.6', '2009.7','2009.8']
cols1 = ['State',2010,'2010.1', '2010.2', '2010.3', '2010.4', '2010.5', '2010.6', '2010.7','2010.8']
cols2 = ['State',2011,'2011.1', '2011.2', '2011.3', '2011.4', '2011.5', '2011.6', '2011.7','2011.8']
cols3 = ['State',2012,'2012.1', '2012.2', '2012.3', '2012.4', '2012.5', '2012.6', '2012.7','2012.8']
cols4 = ['State',2013,'2013.1', '2013.2', '2013.3', '2013.4', '2013.5', '2013.6', '2013.7','2013.8']
cols5 = ['State',2014,'2014.1', '2014.2', '2014.3', '2014.4', '2014.5', '2014.6', '2014.7','2014.8']
cols6 = ['State',2015,'2015.1', '2015.2', '2015.3', '2015.4', '2015.5', '2015.6', '2015.7','2015.8']
datacols = [cols0, cols1,cols2,cols3, cols4, cols5, cols6]
for i, j in zip(years,datacols):
df_temp3 = df_suicide_state[j]
year_df = add_year_column(i,np.arange(len(df_temp3)))
df_temp3.insert(0,'Year',year_df)
if i == 2009:
arr = df_temp3.columns
df_temp3 = df_temp3.rename(columns=
{arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10]})
elif i == 2010:
arr = df_temp3.columns
df_temp3 = df_temp3.rename(columns=
{arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10]})
elif i == 2011:
arr = df_temp3.columns
df_temp3 = df_temp3.rename(columns=
{arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10]})
elif i == 2012:
arr = df_temp3.columns
df_temp3 = df_temp3.rename(columns=
{arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10]})
elif i == 2013:
arr = df_temp3.columns
df_temp3 = df_temp3.rename(columns=
{arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10]})
elif i == 2014:
arr = df_temp3.columns
df_temp3 = df_temp3.rename(columns=
{arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10]})
elif i == 2015:
arr = df_temp3.columns
df_temp3 = df_temp3.rename(columns=
{arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10]})
df_state_suicide_age_gender = df_state_suicide_age_gender.append(df_temp3)
df_state_suicide_age_gender.sort_values(by=['Year','State'], inplace=True)
df_state_sag = df_state_suicide_age_gender
df_state_suicide_age_gender = df_state_suicide_age_gender.set_index(['Year','State'])
# Suicide statistics by age and gender by city and year is captured in this dataframe
df_suicide_cities_age = pd.read_excel('Statewise-Suicide.xlsx', sheet_name=sheet_list[2], skiprows=c)
df_city_suicide_age_gender = pd.DataFrame()
df_temp3 = pd.DataFrame()
arr2= ['Year', 'City', 'State','Upto14years', 'B1529years', 'B3044years', 'B4559years',
'Above60years', 'Male', 'Female', 'Transgender', 'Total']
years = [2009,2010,2011,2012,2013,2014,2015]
cols0 = ['City','State',2009,'2009.1', '2009.2', '2009.3', '2009.4', '2009.5', '2009.6', '2009.7','2009.8']
cols1 = ['City','State',2010,'2010.1', '2010.2', '2010.3', '2010.4', '2010.5', '2010.6', '2010.7','2010.8']
cols2 = ['City','State',2011,'2011.1', '2011.2', '2011.3', '2011.4', '2011.5', '2011.6', '2011.7','2011.8']
cols3 = ['City','State',2012,'2012.1', '2012.2', '2012.3', '2012.4', '2012.5', '2012.6', '2012.7','2012.8']
cols4 = ['City','State',2013,'2013.1', '2013.2', '2013.3', '2013.4', '2013.5', '2013.6', '2013.7','2013.8']
cols5 = ['City','State',2014,'2014.1', '2014.2', '2014.3', '2014.4', '2014.5', '2014.6', '2014.7','2014.8']
cols6 = ['City','State',2015,'2015.1', '2015.2', '2015.3', '2015.4', '2015.5', '2015.6', '2015.7','2015.8']
datacols = [cols0, cols1,cols2,cols3, cols4, cols5, cols6]
for i, j in zip(years,datacols):
df_temp3 = df_suicide_cities_age[j]
year_df = add_year_column(i,np.arange(len(df_temp3)))
df_temp3.insert(0,'Year',year_df)
if i == 2009:
arr = df_temp3.columns
df_temp3 = df_temp3.rename(columns=
{arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
arr[11]: arr2[11]})
elif i == 2010:
arr = df_temp3.columns
df_temp3 = df_temp3.rename(columns=
{arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
arr[11]: arr2[11]})
elif i == 2011:
arr = df_temp3.columns
df_temp3 = df_temp3.rename(columns=
{arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
arr[11]: arr2[11]})
elif i == 2012:
arr = df_temp3.columns
df_temp3 = df_temp3.rename(columns=
{arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
arr[11]: arr2[11]})
elif i == 2013:
arr = df_temp3.columns
df_temp3 = df_temp3.rename(columns=
{arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
arr[11]: arr2[11]})
elif i == 2014:
arr = df_temp3.columns
df_temp3 = df_temp3.rename(columns=
{arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
arr[11]: arr2[11]})
elif i == 2015:
arr = df_temp3.columns
df_temp3 = df_temp3.rename(columns=
{arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
arr[11]: arr2[11]})
df_city_suicide_age_gender = df_city_suicide_age_gender.append(df_temp3)
df_city_suicide_age_gender.sort_values(by=['Year','State','City'], inplace=True)
df_city_sag = df_city_suicide_age_gender
df_city_suicide_age_gender = df_city_suicide_age_gender.set_index(['Year','State','City'])
# Suicide statistics by city for causes only by year
df_suicide_cities_causes = pd.read_excel('Statewise-Suicide.xlsx', sheet_name=sheet_list[3], skiprows=d)
#City Wise Suicide Causes for all the years
df_city_suicide_causes = pd.DataFrame()
df_temp3 = pd.DataFrame()
arr2= ['Year', 'City', 'State',
'BankruptcyorIndebtness', 'Illness', 'DeathofDearPerson',
'Dowrydispute', 'Drugabuse/Addiction', 'Failureinexamination',
'FallinSocialreputation', 'Familyproblems', 'Loveaffairs', 'Poverty',
'Propertydispute', 'Unemployment', 'Causesnotknown', 'OtherSuicide',
'TotalC']
years = [2009,2010,2011,2012,2013,2014,2015]
cols0 = ['City','State',2009,'2009.1', '2009.2', '2009.3', '2009.4', '2009.5', '2009.6', '2009.7','2009.8',
'2009.9', '2009.10', '2009.11', '2009.12', '2009.13', '2009.14']
cols1 = ['City','State',2010,'2010.1', '2010.2', '2010.3', '2010.4', '2010.5', '2010.6', '2010.7','2010.8',
'2010.9', '2010.10', '2010.11', '2010.12', '2010.13', '2010.14']
cols2 = ['City','State',2011,'2011.1', '2011.2', '2011.3', '2011.4', '2011.5', '2011.6', '2011.7','2011.8',
'2011.9', '2011.10', '2011.11', '2011.12', '2011.13', '2011.14']
cols3 = ['City','State',2012,'2012.1', '2012.2', '2012.3', '2012.4', '2012.5', '2012.6', '2012.7','2012.8',
'2012.9', '2012.10', '2012.11', '2012.12', '2012.13', '2012.14']
cols4 = ['City','State',2013,'2013.1', '2013.2', '2013.3', '2013.4', '2013.5', '2013.6', '2013.7','2013.8',
'2013.9', '2013.10', '2013.11', '2013.12', '2013.13', '2013.14']
cols5 = ['City','State',2014,'2014.1', '2014.2', '2014.3', '2014.4', '2014.5', '2014.6', '2014.7','2014.8',
'2014.9', '2014.10', '2014.11', '2014.12', '2014.13', '2014.14']
cols6 = ['City','State',2015,'2015.1', '2015.2', '2015.3', '2015.4', '2015.5', '2015.6', '2015.7','2015.8',
'2015.9', '2015.10', '2015.11', '2015.12', '2015.13', '2015.14']
datacols = [cols0, cols1,cols2,cols3, cols4, cols5, cols6]
for i, j in zip(years,datacols):
df_temp3 = df_suicide_cities_causes[j]
year_df = add_year_column(i,np.arange(len(df_temp3)))
df_temp3.insert(0,'Year',year_df)
if i == 2009:
arr = df_temp3.columns
df_temp3 = df_temp3.rename(
columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
arr[11]: arr2[11], arr[12]: arr2[12], arr[13]: arr2[13], arr[14]: arr2[14], arr[15]: arr2[15],
arr[16]: arr2[16], arr[17]: arr2[17]})
elif i == 2010:
arr = df_temp3.columns
df_temp3 = df_temp3.rename(
columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
arr[11]: arr2[11], arr[12]: arr2[12], arr[13]: arr2[13], arr[14]: arr2[14], arr[15]: arr2[15],
arr[16]: arr2[16], arr[17]: arr2[17]})
elif i == 2011:
arr = df_temp3.columns
df_temp3 = df_temp3.rename(
columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
arr[11]: arr2[11], arr[12]: arr2[12], arr[13]: arr2[13], arr[14]: arr2[14], arr[15]: arr2[15],
arr[16]: arr2[16], arr[17]: arr2[17]})
elif i == 2012:
arr = df_temp3.columns
df_temp3 = df_temp3.rename(
columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
arr[11]: arr2[11], arr[12]: arr2[12], arr[13]: arr2[13], arr[14]: arr2[14], arr[15]: arr2[15],
arr[16]: arr2[16], arr[17]: arr2[17]})
elif i == 2013:
arr = df_temp3.columns
df_temp3 = df_temp3.rename(
columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
arr[11]: arr2[11], arr[12]: arr2[12], arr[13]: arr2[13], arr[14]: arr2[14], arr[15]: arr2[15],
arr[16]: arr2[16], arr[17]: arr2[17]})
elif i == 2014:
arr = df_temp3.columns
df_temp3 = df_temp3.rename(
columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
arr[11]: arr2[11], arr[12]: arr2[12], arr[13]: arr2[13], arr[14]: arr2[14], arr[15]: arr2[15],
arr[16]: arr2[16], arr[17]: arr2[17]})
elif i == 2015:
arr = df_temp3.columns
df_temp3 = df_temp3.rename(
columns={arr[0]: arr2[0], arr[1]: arr2[1],arr[2]: arr2[2],arr[3]: arr2[3],arr[4]: arr2[4], arr[5]: arr2[5],
arr[6]: arr2[6],arr[7]: arr2[7],arr[8]: arr2[8],arr[9]: arr2[9],arr[10]: arr2[10],
arr[11]: arr2[11], arr[12]: arr2[12], arr[13]: arr2[13], arr[14]: arr2[14], arr[15]: arr2[15],
arr[16]: arr2[16], arr[17]: arr2[17]})
df_city_suicide_causes = df_city_suicide_causes.append(df_temp3)
df_city_suicide_causes.sort_values(by=['Year','State','City'], inplace=True)
df_city_sui = df_city_suicide_causes
df_city_suicide_causes = df_city_suicide_causes.set_index(['Year','State','City'])
#print(df_city_suicide_causes.head(5))
#dataframes from natural and unnatural causes are mergerd with the suicide cases dataframe on the states statistics
df_comp_states = df_complete.loc[df_complete['Year'] > 2008]
df_comp_states=df_comp_states.set_index(['Year','State'])
df_comp_states.sort_values(by=['Year','State'], inplace=True)
df_all_states_causes = pd.merge(df_comp_states,
df_comp_suicide[['BankruptcyorIndebtness', 'Illness', 'DeathofDearPerson',
'Dowrydispute', 'Drugabuse/Addiction', 'Failureinexamination',
'FallinSocialreputation', 'Familyproblems', 'Loveaffairs', 'Poverty',
'Propertydispute', 'Unemployment', 'Causesnotknown', 'OtherSuicide','TotalC']],
on=['Year','State'], how='inner')
#join regular causes dataframe with suicide dataframe
df_allcauses_year = pd.merge(df_accident_naun, df_suicide_year, left_on='Year', right_on='Year')
all_causes_year = pd.pivot_table(df_allcauses_year, index='Year', values=['NTotal', 'UnTotal', 'TotalC'])
def plot_stacked_bar_overall(df_natural, statename):
df_exp2 = df_natural.loc[df_natural['State']==statename]
fig = go.Figure(data=[
go.Bar(name='NaturalCauses', y= df_exp2['Year'], x=df_exp2['NTotal'],
marker_color='palevioletred',orientation='h'),
go.Bar(name='UnNaturalCauses', y= df_exp2['Year'], x=df_exp2['UnTotal'],
marker_color='dimgrey',orientation='h')
])
fig.update_layout(title='Natural and Unnatural Accidents',
xaxis_tickfont_size=14,
yaxis=dict(title='Overall Count of Accidents',
titlefont_size=16,
tickfont_size=14),
legend=dict(xanchor='right', yanchor='top', bgcolor='rgba(255, 255, 255, 0)',
bordercolor='rgba(255, 255, 255, 0)'),
barmode='stack')
fig.show()
plot_stacked_bar_overall(df_complete,'WB')
#The choropleth map is used to evidently show the statistics differentiation for the years before 2014 and after 2014,
#when the state TS originated in Indian States set. Only Natural and Unnatural causes are plotted. When combined with Suicide,
#no major variation of data could be identified from the graph
def plot_choropleth_map(df_natural,year):
pd.options.mode.chained_assignment = None
with open("india_states-2013.json") as jsonfile1:
states_2013 = json.load(jsonfile1)
with open("india_states-2014-2015.json") as jsonfile2:
states_2014_2015 = json.load(jsonfile2)
if year < 2014:
df_data = df_natural.loc[(df_natural['Year']==year) & (df_natural['State'] != 'TS')]
states = states_2013
else:
df_data = df_natural.loc[df_natural['Year']==year]
states = states_2014_2015
df_data['GrandTotal']=df_data.NTotal + df_data.UnTotal
arr = df_data[['State', 'GrandTotal']]
fig1 = px.choropleth(arr, geojson=states, locations='State', color='GrandTotal',
color_continuous_scale="Viridis",
range_color=(0, 30000),
scope="asia",
labels={'GrandTotal':'Total'}
)
fig1.update_layout(margin={"r":0,"t":0,"l":0,"b":0}, title= 'Accident Statistics for the Country')
fig1.show()
plot_choropleth_map(df_complete,2008)